require(colorout)
require(sqldf)
require(arm)
require(RPostgreSQL)

remove(list=objects())
options(digits=2, scipen=9, width=110, java.parameters = "-Xrs")

################################################################################################################
# helper functions

username <- system("echo $USER", intern=TRUE)
conn <- dbConnect(dbDriver("PostgreSQL"), user=username, password="", host="localhost", port=5432, dbname=username)
SQL <- function(x) 
  dbGetQuery(conn, x)
SQL_EXECUTE <- function(x)
  system(paste0("psql -d $USER -U $USER -c '", x, "';"))

CMUSansSerif <- Type1Font(
  "CMUSansSerif", 
  c("fonts/cmuss1.afm",
    "fonts/cmuss2.afm",
    "fonts/cmuss3.afm",
    "fonts/cmuss4.afm"))
postscriptFonts(CMUSansSerif=CMUSansSerif)
pdfFonts(CMUSansSerif=CMUSansSerif)

################################################################################################################
################################################################################################################
################################################################################################################
# FIGURES 1-4, EXIT POLL COMPARISON

labels <- c("Obama 2012 Support: National", "Obama 2012 Support: Florida", "Cuyahoga County", "Virginia 7th Congressional District")
n_samples <- 100
mrp_full <- SQL(paste0("
  select 
  -- figure indicators
  1 as fig1, 
  case when a.state = 'FL' then 1 else 0 end as fig2, 
  case when a.state = 'OH' and fips = '39035' then 1 else 0 end as fig3, 
  case when a.state = 'VA' and cd = 7 then 1 else 0 end as fig4, 
  -- groups
  case when race = 'Other' then 'Asian/Other' else race end as race, 
  age, 
  gender, 
  marital, 
  case when income_num < 30000 then 'Under $30k'
       when income_num < 50000 then '$30-50k'
       when income_num < 100000 then '$50-100k'
       when income_num < 200000 then '$100-200k'
       when income_num < 250000 then '$200-250k'
       when income_num >= 250000 then '$250k or more'
       else NULL end as income, 
  count(*) as n, 
  ", paste0(paste0("avg(yhat", 1:n_samples, ") as yhat", 1:n_samples), collapse=", "), "
  from poststrat a 
  inner join final b on (a.randomid=b.randomid)
  where voted2012 = 1
  group by 1, 2, 3, 4, 5, 6, 7, 8, 9
  order by 1, 2, 3, 4, 5, 6, 7, 8, 9;
"))
mrp_full$race_age <- paste0(mrp_full$race, " ", mrp_full$age)
mrp_full$race_gender <- paste0(mrp_full$race, " ", mrp_full$gender)
mrp_full$marital_gender <- paste0(mrp_full$marital, " ", mrp_full$gender)

ord <- c(
  "White", "Black", "Hispanic", "Asian/Other", 
  "18-29", "30-44", "45-64", "65 or older", 
  "Female", "Male", 
  "Married", "Single", 
  "Under $30k", "$30-50k", "$50-100k", "$100-200k", "$200-250k", "$250k or more", 
  "White 18-29", "White 30-44", "White 45-64", "White 65 or older", 
  "Black 18-29", "Black 30-44", "Black 45-64", "Black 65 or older", 
  "Hispanic 18-29", "Hispanic 30-44", "Hispanic 45-64", "Hispanic 65 or older", 
  "Single Female", "Single Male", "Married Female", "Married Male", 
  "White Female", "White Male", 
  "Black Female", "Black Male", 
  "Hispanic Female", "Hispanic Male")

options(sqldf.driver = "SQLite")
mrp <- lapply(1:4, function(i_version) {
  mrp <- NULL
  for (x in c("race", "age", "gender", "marital", "income", "race_age", "marital_gender", "race_gender")) {
    tmp <- sqldf(paste0("
      select 
      ", x, " as grp, 
      sum(n) as n, 
      ", paste0(paste0("sum(yhat", 1:n_samples, " * n) / sum(n) as yhat", 1:n_samples), collapse=", "), "
      from mrp_full
      where fig", i_version, " = 1
      group by 1
    "))
    yhats <- tmp[, paste0("yhat", 1:n_samples)]
    tmp$p <- tmp$n / sum(tmp$n)
    tmp$lo <- apply(yhats, 1, quantile, prob=0.025)
    tmp$mu <- apply(yhats, 1, quantile, prob=0.5)
    tmp$hi <- apply(yhats, 1, quantile, prob=0.975)
    tmp <- tmp[, c("grp", "p", "lo", "mu", "hi")]
    tmp <- tmp[tmp$grp %in% ord,]
    mrp <- rbind(mrp, tmp)
  }
  rownames(mrp) <- mrp$grp
  mrp <- mrp[ord,]
  rownames(mrp) <- NULL
  return(mrp)
})

################################################################################################################
# load exit poll data

ep <- openxlsx::read.xlsx("data/2012_exit_polls.xlsx")
ep <- ep[ep$question %in% c("age", "age_race", "gender_marital", "income", "married", "race", "sex", "sex_race"),]
ep$response <- car::recode(ep$response, "
  '18-29'='18-29';
  '30-44'='30-44';
  '45-64'='45-64';
  '65 or over'='65 or older';
  'Black 18-29'='Black 18-29';
  'Black 30-44'='Black 30-44';
  'Black 45-64'='Black 45-64';
  'Black 65+'='Black 65 or older';
  'Latino 18-29'='Hispanic 18-29';
  'Latino 30-44'='Hispanic 30-44';
  'Latino 45-64'='Hispanic 45-64';
  'Latino 65+'='Hispanic 65 or older';
  'White 18-29'='White 18-29';
  'White 30-44'='White 30-44';
  'White 45-64'='White 45-64';
  'White 65+'='White 65 or older';
  'Married men'='Married Male';
  'Married women'='Married Female';
  'Non-married men'='Single Male';
  'Non-married women'='Single Female';
  '$100,000 - $199,999'='$100-200k';
  '$200,000 - $249,999'='$200-250k';
  '$250,000 or more'='$250k or more';
  '$30,000 - $49,999'='$30-50k';
  '$50,000 - $99,999'='$50-100k';
  'Under $30,000'='Under $30k';
  'No'='Single';
  'Yes'='Married';
  'Asian/Other'='Asian/Other';
  'Black'='Black';
  'Hispanic/Latino'='Hispanic';
  'White'='White';
  'Female'='Female';
  'Male'='Male';
  'Black men'='Black Male';
  'Black women'='Black Female';
  'Latino men'='Hispanic Male';
  'Latino women'='Hispanic Female';
  'White men'='White Male';
  'White women'='White Female';
  else=NA", as.factor=FALSE)

ep$y <- ep$ep_obama / (ep$ep_obama + ep$ep_romney)
ep$samp <- ep$ep_sample * ep$ep_total * (ep$ep_obama + ep$ep_romney)
ep$moe <- 1.96 * sqrt((ep$y * (1 - ep$y)) / ep$samp)

ep <- data.frame(
  state=ep$state, 
  grp=ep$response,
  p=ep$ep_total, 
  lo=ep$y - ep$moe, 
  mu=ep$y, 
  hi=ep$y + ep$moe, 
  stringsAsFactors=FALSE)
ep <- ep[complete.cases(ep),]
ep_full <- ep

ep <- list()
tmp <- ep_full[ep_full$state == "US",]
rownames(tmp) <- tmp$grp
tmp <- tmp[mrp[[1]]$grp,]
ep[[1]] <- tmp

tmp <- ep_full[ep_full$state == "FL",]
rownames(tmp) <- tmp$grp
tmp <- tmp[mrp[[2]]$grp,]
ep[[2]] <- tmp

for (i_plot in 3:4) {
  ep[[i_plot]] <- mrp[[i_plot]]
  for (i in c("p", "lo", "mu", "hi"))
    ep[[i_plot]][,i] <- NA
}

################################################################################################################
# plots

filenames <- paste0("figures/1percent_figure", 1:4, ".pdf")
for (i_plot in 1:length(labels)) {

  pdf(filenames[i_plot], width=5, height=7, family=CMUSansSerif)
  par(mfrow=c(1,1), mar=c(2,5,3,4), mgp=c(1.75,0.1,0), tck=0.005, lwd=0.5, pty="m", cex.axis=0.5, cex.main=0.8)

  mrp_tmp <- mrp[[i_plot]]
  ep_tmp <- ep[[i_plot]]

  plot(0, 0, xlim=c(0,1), ylim=c(nrow(mrp_tmp) + 1, 0), type="n", 
    axes=FALSE, xlab="", ylab="", xaxs="i", yaxs="i")
  abline(v=seq(0, 1, by=0.25), col=rgb(0.75, 0.75, 0.75), lty=3)
  abline(v=0.5, col=rgb(0.75, 0.75, 0.75))
  abline(h=1:nrow(mrp_tmp), col=rgb(0.75, 0.75, 0.75), lty=3)

  points(mrp_tmp$mu, 1:nrow(mrp_tmp) - 0.1, pch=20, cex=0.6)
  segments(mrp_tmp$lo, 1:nrow(mrp_tmp) - 0.1, mrp_tmp$hi, 1:nrow(mrp_tmp) - 0.1)
  points(ep_tmp$mu, 1:nrow(ep_tmp) + 0.1, pch=20, cex=0.6, col="dark grey")
  segments(ep_tmp$lo, 1:nrow(ep_tmp) + 0.1, ep_tmp$hi, 1:nrow(ep_tmp) + 0.1, col="dark grey")

  p1 <- paste0("(", round(100 * mrp_tmp$p), "% vs. ", round(100 * ep_tmp$p), "%)")
  p1 <- gsub("NA", "--", p1)
  p2 <- paste0("(", round(100 * mrp_tmp$p), "% vs. ")
  p3 <- paste0("(", round(100 * mrp_tmp$p), "%")

  par(cex.axis=0.6, mgp=c(1.5,0.05,0))
  axis(1, at=seq(0, 1, by=0.25), lwd=par()$lwd)
  par(cex.axis=0.5, mgp=c(1.5,0.3,0))
  axis(2, at=1:nrow(mrp_tmp), lab=mrp_tmp$grp, lwd=par()$lwd, las=2)
  axis(4, at=1:nrow(mrp_tmp), lab=p1, lwd=par()$lwd, las=2, col.axis="dark grey", lty=0)
  axis(4, at=1:nrow(mrp_tmp), lab=p2, lwd=par()$lwd, las=2, col.axis="white", font=2, lty=0)
  axis(4, at=1:nrow(mrp_tmp), lab=p2, lwd=par()$lwd, las=2, lty=0)
  axis(4, at=1:nrow(mrp_tmp), lab=p3, lwd=par()$lwd, las=2, col.axis="white", font=2, lty=0)
  axis(4, at=1:nrow(mrp_tmp), lab=p3, lwd=par()$lwd, las=2)
  axis(4, at=0, lab=expression(underline("Group Size")), las=2)
  box(lwd=par()$lwd)

  par(xpd=TRUE)
  legend("topright", c("MRP", "Exit Poll"), fill=c("black", "dark grey"), 
    box.lty=0, border=NA, cex=0.5, inset=c(0,-0.06))
  par(xpd=FALSE)

  title(paste0("\n", labels[i_plot]), adj=0)
  dev.off()

}
